Python Programming Tutorials

Graph Clustering

i have a geodesic distance of graph in .csv format
index1 index2 index3 index4 index5 index6 index7 index8 index9 index10 index11 index12 index13 index14 index15 index16
index1 0 1 1 1 2 2 1 1 1 2 3 1 3 1 2 2
index2 1 0 1 2 3 3 2 2 2 3 4 2 4 2 3 3
index3 1 1 0 2 3 3 2 1 2 3 4 2 4 2 3 3
index4 1 2 2 0 1 1 2 2 1 1 2 1 2 2 2 3
index5 2 3 3 1 0 2 3 3 2 2 3 2 3 3 3 4
index6 2 3 3 1 2 0 3 3 2 1 1 2 2 3 3 4
index7 1 2 2 2 3 3 0 1 2 3 4 2 4 2 1 1
index8 1 2 1 2 3 3 1 0 1 2 4 2 3 2 2 2
index9 1 2 2 1 2 2 2 1 0 1 3 2 2 2 1 3
index10 2 3 3 1 2 1 3 2 1 0 2 2 1 3 2 4
index11 3 4 4 2 3 1 4 4 3 2 0 3 3 4 4 5
index12 1 2 2 1 2 2 2 2 2 2 3 0 3 2 3 3
index13 3 4 4 2 3 2 4 3 2 1 3 3 0 4 3 5
index14 1 2 2 2 3 3 2 2 2 3 4 2 4 0 3 3
index15 2 3 3 2 3 3 1 2 1 2 4 3 3 3 0 2
index16 2 3 3 3 4 4 1 2 3 4 5 3 5 3 2 0

i want to reduce it into 2D using Multidimensional Scaling (MDS) and cluster it using Kmedoids
This is my code:


# coding: utf-8
import numpy as np
import networkx as nx
import matplotlib.pyplot as pl
import csv
from sklearn import manifold
from sklearn.metrics.pairwise import pairwise_distances
import kmedoidss

rawdata = csv.reader(open(data.csv', 'r').readlines()[1:])

# Process the data into a 2D array, omitting the header row
data, labels = [], []
for row in rawdata:
    labels.append(row[1])
    data.append([int(i) for i in row[1:]])
#print data

# Now run very basic MDS
# Documentation here: http://scikit-learn.org/dev/modules/generated/sklearn.manifold.MDS.html#sklearn.manifold.MDS
mds = manifold.MDS(n_components=2, dissimilarity="precomputed")
pos = mds.fit_transform(data)
                    
# distance matrix
D = pairwise_distances(pos, metric='euclidean')

# split into c clusters
M, C = kmedoidss.kMedoids(D, 3)

print ('Data awal : ')
for index, point_idx in enumerate(pos, 1):
    print(index, point_idx)

print ('n medoids:' )
for point_idx in M:
    print('{} index ke - {} '.format (pos[point_idx], point_idx+1))

print('')
print('clustering result:')
for label in C:
    for point_idx in C[label]:
        print('cluster-  {}:{} index- {}'.format(label, pos[point_idx], point_idx+1))

kmedoidss.py


import numpy as np
import random

def kMedoids(D, k, tmax=100):
    # determine dimensions of distance matrix D
    m, n = D.shape

    # randomly initialize an array of k medoid indices
    M = np.sort(np.random.choice(n, k))

    # create a copy of the array of medoid indices
    Mnew = np.copy(M)

    # initialize a dictionary to represent clusters
    C = {}
    for t in xrange(tmax):
        # determine clusters, i. e. arrays of data indices
        J = np.argmin(D[:,M], axis=1)
        for kappa in range(k):
            C[kappa] = np.where(J==kappa)[0]
        # update cluster medoids
        for kappa in range(k):
            J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1)
            j = np.argmin(J)
            Mnew[kappa] = C[kappa][j]
        np.sort(Mnew)
        # check for convergence
        if np.array_equal(M, Mnew):
            break
        M = np.copy(Mnew)
    else:
        # final update of cluster memberships
        J = np.argmin(D[:,M], axis=1)
        for kappa in range(k):
            C[kappa] = np.where(J==kappa)[0]

    # return results
    return M, C

how to visualize the cluster result as a graph with different node color based on its cluster?

You must be logged in to post. Please login or register an account.